Goal of the script

This script plots sensor data to visualise the measurements recorded throughout the tool function experiment. Variables of interest are: * Penetration depth

dir_in <- "analysis_ST/derived_data/"
dir_out <- "analysis_ST/plots"

Raw data must be located in ~/analysis_ST/derived_data/.
Formatted data will be saved in ~/analysis_ST/plots. The knit directory for this script is the project directory.


Load packages

pack_to_load <- c("tidyverse", "R.utils", "openxlsx", "tools", 
                  "patchwork", "doBy", "ggrepel", "ggplot2")
sapply(pack_to_load, library, character.only = TRUE, logical.return = TRUE)
Warning: package 'tidyverse' was built under R version 4.1.3
Warning: package 'ggplot2' was built under R version 4.1.3
Warning: package 'tibble' was built under R version 4.1.3
Warning: package 'tidyr' was built under R version 4.1.3
Warning: package 'readr' was built under R version 4.1.3
Warning: package 'dplyr' was built under R version 4.1.3
Warning: package 'stringr' was built under R version 4.1.3
Warning: package 'forcats' was built under R version 4.1.3
Warning: package 'R.utils' was built under R version 4.1.3
Warning: package 'R.oo' was built under R version 4.1.3
Warning: package 'R.methodsS3' was built under R version 4.1.3
Warning: package 'openxlsx' was built under R version 4.1.3
Warning: package 'patchwork' was built under R version 4.1.3
Warning: package 'doBy' was built under R version 4.1.3
tidyverse   R.utils  openxlsx     tools patchwork      doBy   ggrepel   ggplot2 
     TRUE      TRUE      TRUE      TRUE      TRUE      TRUE      TRUE      TRUE 

Get name, path and information of the file

data_file <- list.files(dir_in, pattern = "\\.xlsx$", full.names = TRUE)
md5_in <- md5sum(data_file)
info_in <- data.frame(file = basename(names(md5_in)), checksum = md5_in, row.names = NULL)

Load data into R object

imp_data <- read.xlsx(data_file)
str(imp_data)
'data.frame':   479933 obs. of  11 variables:
 $ Sample      : chr  "FLT8-7" "FLT8-7" "FLT8-7" "FLT8-7" ...
 $ Angle       : chr  "35°" "35°" "35°" "35°" ...
 $ Task        : chr  "carving" "carving" "carving" "carving" ...
 $ Raw_material: chr  "Flint" "Flint" "Flint" "Flint" ...
 $ Stroke      : num  1 1 1 1 1 1 1 1 1 1 ...
 $ Step        : num  1 2 3 4 5 6 7 8 9 10 ...
 $ Force       : num  -58.9 -59.2 -62.4 -59.6 -59.2 ...
 $ Friction    : num  -9.06 -8.84 -31.52 -28.64 -27.95 ...
 $ Depth       : num  5.57 5.59 5.72 5.75 5.58 ...
 $ Position    : num  65 66.4 97 155.4 211.3 ...
 $ Velocity    : num  -1.72e-04 6.87e+01 4.61e+02 5.94e+02 4.65e+02 ...

The imported file is: “~/analysis_ST/derived_data/TFE_inotec.xlsx”

Plot each of the selected numeric variable

Plots showing the strokes as lines

# exclude the strokes with the wrong/missing values: FLT8-3 -> stroke 1032 - 2000, FLT8-4 ->   
# stroke 243 + 244 + 1921, LYDIT5-7 -> 1997 - 2000 
good_data <- imp_data[-c(410218:419907, 242382:242401, 259107:259116,359872:359911), ]


# plot all 2000 strokes per sample divided by 40 
# split the data in the individual 24 samples
sp <- split(good_data, good_data[["Sample"]])


for (i in seq_along(sp)) {
  # create a sequence of every ~ 50th strokes 
  seq_st <- seq(1, length(unique(sp[[i]][["Stroke"]])), by = 40) %>% 
            c(max(unique(sp[[i]][["Stroke"]])))
  dat_i_all <- sp[[i]] %>% 
               filter(Stroke %in% seq_st)
  range_depth <- range(dat_i_all[["Depth"]])
  p1 <- ggplot(data = dat_i_all, aes(x = Step, y = Depth, colour = Stroke)) +
        geom_line(aes(group = Stroke), alpha = 0.3) + 
        labs(x = "Step", y = "Depth (mm)") + ylab(NULL) +
        # reverse the legend starting with 0 going to 2000 strokes    
        scale_colour_continuous(trans = "reverse") + 
        coord_cartesian(ylim = range_depth) +
        # change the 'Step-number' in the x-legend  
        scale_x_continuous(breaks=c(1, 4, 7, 10)) +
          theme_classic()
       
# plot only the first 50 strokes per sample  
  dat_i_50 <- sp[[i]] %>% 
              # take only the first 50 strokes per sample
              filter(Stroke %in% 1:50)
  p2 <- ggplot(data = dat_i_50) +
        geom_line(aes(x = Step, y = Depth, colour = Stroke, group = Stroke), alpha = 0.3) + 
        labs(x = "Step", y = "Depth (mm)") + 
        scale_colour_continuous(trans = "reverse") + 
        coord_cartesian(ylim = range_depth) +
        scale_x_continuous(breaks = c(1, 4, 7, 10)) +
          theme_classic()
  
  # patchwork plot
  p <- p2 + p1 + plot_annotation(title = names(sp)[i]) 
  print(p)

  # save to PDF
  file_out <- paste0(file_path_sans_ext(info_in[["file"]]), "_plot_", 
                       names(sp)[i], ".pdf")
  ggsave(filename = file_out, plot = p, path = dir_out, device = "pdf")
} 

Plots showing the relative penetration depths

Plot of all samples

# calculate the relative depths reached per sample
rel.depth <- function(x) {
  out <- abs(min(x) - max(x))
}

# define grouping variable and compute the summary statistics 
depth <- summaryBy(Depth ~ Sample + Angle + Task + Raw_material, 
                  data = imp_data, 
                  FUN = rel.depth)

str(depth)
'data.frame':   24 obs. of  5 variables:
 $ Sample         : chr  "FLT8-1" "FLT8-10" "FLT8-11" "FLT8-12" ...
 $ Angle          : chr  "45°" "45°" "45°" "45°" ...
 $ Task           : chr  "cutting" "carving" "carving" "carving" ...
 $ Raw_material   : chr  "Flint" "Flint" "Flint" "Flint" ...
 $ Depth.rel.depth: num  0.687 11.923 0.822 1.685 0.69 ...
# colour 
depth[["Raw_material"]] <- factor(depth[["Raw_material"]])
custom.col7 <- data.frame(type = levels(depth$Raw_material), 
                           col = c("#899DA4", "#DC863B")) 
depth$col <- custom.col7[depth$Raw_material, "col"]


# plot all depth points in one facet plot 
p3 <- ggplot(data = depth, aes(x = Angle, y = Depth.rel.depth, colour = Raw_material)) +
       geom_point() + labs(y = "Relative depth (mm)") +
       facet_wrap(~Task, strip.position = "bottom") +
       # avoid overplotting of the labels (sample IDs)
       geom_text_repel(aes(label=Sample), size = 2, nudge_x = -0.4, 
                       segment.size = 0.1, force = 2, seed = 123) +
       scale_y_continuous(trans = "reverse") +
       scale_x_discrete(position="top") +
       # remove the "_" between "Raw_material in the legend 
         labs(colour = gsub("_", " ", "Raw_material")) + 
       scale_colour_manual(values = custom.col7$col) + 
         theme_classic()

print(p3)

# save to PDF
file_out <- paste0(file_path_sans_ext(info_in[["file"]]), "_P3_depth_plot", ".pdf")
ggsave(filename = file_out, plot = p3, path = dir_out, device = "pdf", 
       width = 180, units = "mm")

Plot of all samples except the three outliers

# define the outlier (FLT8-10 is here defined as outlier, because the result of this sample   is not comparable to the other samples)
bad_sample <- "FLT8-10"
# create data frames without the outlier
good_data_outlier <- good_data[!good_data$Sample %in% bad_sample, ]
# split the data in the individual 21 samples
sp_good <- split(good_data_outlier, good_data_outlier[["Sample"]])


# define grouping variable and compute the summary statistics 
depth_good <- summaryBy(Depth ~ Sample + Angle + Task + Raw_material, data = good_data_outlier, 
                  FUN = rel.depth)

str(depth_good)
'data.frame':   23 obs. of  5 variables:
 $ Sample         : chr  "FLT8-1" "FLT8-11" "FLT8-12" "FLT8-2" ...
 $ Angle          : chr  "45°" "45°" "45°" "45°" ...
 $ Task           : chr  "cutting" "carving" "carving" "cutting" ...
 $ Raw_material   : chr  "Flint" "Flint" "Flint" "Flint" ...
 $ Depth.rel.depth: num  0.687 0.822 1.685 0.69 0.741 ...
# plot all depth points in one facet plot 
p4 <- ggplot(data = depth_good, aes(x = Angle, y = Depth.rel.depth, 
                                    colour = Raw_material)) +
       geom_point() + labs(y = "Relative depth (mm)") +
       facet_wrap(~Task, strip.position = "bottom") +
       geom_text_repel(aes(label=Sample), size = 2, 
                       nudge_x = -0.4, segment.size = 0.1, force = 2, seed = 123) +
       scale_y_continuous(trans = "reverse") +
       scale_x_discrete(position="top") +
       # remove the "_" between "Raw_material in the legend 
         labs(colour = gsub("_", " ", "Raw_material")) + 
       scale_colour_manual(values = custom.col7$col) + 
         theme_classic()

print(p4)

# save to PDF
file_out <- paste0(file_path_sans_ext(info_in[["file"]]), "_P4_depth_plot", ".pdf")
ggsave(filename = file_out, plot = p4, path = dir_out, device = "pdf", 
       width = 180, units = "mm")

The files will be saved as “~/analysis_ST/plots.[ext]”.


sessionInfo() and RStudio version

sessionInfo()
R version 4.1.1 (2021-08-10)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 19043)

Matrix products: default

locale:
[1] LC_COLLATE=German_Germany.1252  LC_CTYPE=German_Germany.1252   
[3] LC_MONETARY=German_Germany.1252 LC_NUMERIC=C                   
[5] LC_TIME=German_Germany.1252    

attached base packages:
[1] tools     stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] ggrepel_0.9.1     doBy_4.6.13       patchwork_1.1.2   openxlsx_4.2.5   
 [5] R.utils_2.12.0    R.oo_1.25.0       R.methodsS3_1.8.2 forcats_0.5.2    
 [9] stringr_1.4.1     dplyr_1.0.10      purrr_0.3.4       readr_2.1.2      
[13] tidyr_1.2.1       tibble_3.1.8      ggplot2_3.3.6     tidyverse_1.3.2  

loaded via a namespace (and not attached):
 [1] httr_1.4.4           sass_0.4.2           jsonlite_1.8.0      
 [4] modelr_0.1.9         bslib_0.4.0          microbenchmark_1.4.9
 [7] assertthat_0.2.1     highr_0.9            googlesheets4_1.0.1 
[10] cellranger_1.1.0     yaml_2.3.5           pillar_1.8.1        
[13] backports_1.4.1      lattice_0.20-44      glue_1.6.2          
[16] digest_0.6.29        rvest_1.0.3          colorspace_2.0-3    
[19] htmltools_0.5.4      Matrix_1.5-1         pkgconfig_2.0.3     
[22] broom_1.0.1          haven_2.5.1          scales_1.2.1        
[25] tzdb_0.3.0           googledrive_2.0.0    generics_0.1.3      
[28] farver_2.1.1         ellipsis_0.3.2       cachem_1.0.6        
[31] withr_2.5.0          cli_3.4.0            magrittr_2.0.3      
[34] crayon_1.5.1         readxl_1.4.1         evaluate_0.16       
[37] fs_1.5.2             fansi_1.0.3          MASS_7.3-54         
[40] xml2_1.3.3           hms_1.1.2            gargle_1.2.1        
[43] lifecycle_1.0.2      munsell_0.5.0        reprex_2.0.2        
[46] zip_2.2.1            compiler_4.1.1       Deriv_4.1.3         
[49] jquerylib_0.1.4      rlang_1.0.5          grid_4.1.1          
[52] rstudioapi_0.14      labeling_0.4.2       rmarkdown_2.16      
[55] gtable_0.3.1         DBI_1.1.3            R6_2.5.1            
[58] lubridate_1.8.0      knitr_1.40           fastmap_1.1.0       
[61] utf8_1.2.2           stringi_1.7.8        Rcpp_1.0.9          
[64] vctrs_0.4.1          dbplyr_2.2.1         tidyselect_1.1.2    
[67] xfun_0.33           

RStudio version 1.4.1717.

Cite R packages used

for (i in pack_to_load) print(citation(i), bibtex = FALSE)

Wickham H, Averick M, Bryan J, Chang W, McGowan LD, François R,
Grolemund G, Hayes A, Henry L, Hester J, Kuhn M, Pedersen TL, Miller E,
Bache SM, Müller K, Ooms J, Robinson D, Seidel DP, Spinu V, Takahashi
K, Vaughan D, Wilke C, Woo K, Yutani H (2019). "Welcome to the
tidyverse." _Journal of Open Source Software_, *4*(43), 1686. doi:
10.21105/joss.01686 (URL: https://doi.org/10.21105/joss.01686).


To cite package 'R.utils' in publications use:

  Henrik Bengtsson (2022). R.utils: Various Programming Utilities. R
  package version 2.12.0. https://CRAN.R-project.org/package=R.utils


To cite package 'openxlsx' in publications use:

  Philipp Schauberger and Alexander Walker (2021). openxlsx: Read,
  Write and Edit xlsx Files. R package version 4.2.5.
  https://CRAN.R-project.org/package=openxlsx


The 'tools' package is part of R.  To cite R in publications use:

  R Core Team (2021). R: A language and environment for statistical
  computing. R Foundation for Statistical Computing, Vienna, Austria.
  URL https://www.R-project.org/.

We have invested a lot of time and effort in creating R, please cite it
when using it for data analysis. See also 'citation("pkgname")' for
citing R packages.


To cite package 'patchwork' in publications use:

  Thomas Lin Pedersen (2022). patchwork: The Composer of Plots. R
  package version 1.1.2. https://CRAN.R-project.org/package=patchwork


To cite package 'doBy' in publications use:

  Søren Højsgaard and Ulrich Halekoh (2022). doBy: Groupwise
  Statistics, LSmeans, Linear Estimates, Utilities. R package version
  4.6.13. https://CRAN.R-project.org/package=doBy

ATTENTION: This citation information has been auto-generated from the
package DESCRIPTION file and may need manual editing, see
'help("citation")'.


To cite package 'ggrepel' in publications use:

  Kamil Slowikowski (2021). ggrepel: Automatically Position
  Non-Overlapping Text Labels with 'ggplot2'. R package version 0.9.1.
  https://CRAN.R-project.org/package=ggrepel


To cite ggplot2 in publications, please use:

  H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
  Springer-Verlag New York, 2016.

END OF SCRIPT